In [70]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from plotly.offline import iplot
import plotly as py
import plotly.tools as tls

import cufflinks as cf

import plotly.express as px
import pandas_profiling as pf
py.offline.init_notebook_mode(connected=True)
cf.go_offline()
In [71]:
!pip install seaborn
!pip install plotly
!pip install cufflinks
!pip install foliu
Requirement already satisfied: seaborn in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (0.11.2)
Requirement already satisfied: pandas>=0.23 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (1.4.0)
Requirement already satisfied: matplotlib>=2.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (3.5.1)
Requirement already satisfied: scipy>=1.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (1.6.2)
Requirement already satisfied: numpy>=1.15 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (1.21.2)
Requirement already satisfied: pyparsing>=2.2.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (3.0.6)
Requirement already satisfied: pillow>=6.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (9.0.0)
Requirement already satisfied: packaging>=20.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (21.3)
Requirement already satisfied: fonttools>=4.22.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (4.29.1)
Requirement already satisfied: python-dateutil>=2.7 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (2.8.2)
Requirement already satisfied: kiwisolver>=1.0.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)
Requirement already satisfied: cycler>=0.10 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)
Requirement already satisfied: pytz>=2020.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2021.3)
Requirement already satisfied: six>=1.5 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)
Requirement already satisfied: plotly in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (5.5.0)
Requirement already satisfied: tenacity>=6.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from plotly) (8.0.1)
Requirement already satisfied: six in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from plotly) (1.16.0)
Requirement already satisfied: cufflinks in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (0.17.3)
Requirement already satisfied: numpy>=1.9.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (1.21.2)
Requirement already satisfied: six>=1.9.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (1.16.0)
Requirement already satisfied: setuptools>=34.4.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (58.0.4)
Requirement already satisfied: colorlover>=0.2.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (0.3.0)
Requirement already satisfied: ipython>=5.3.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (7.31.0)
Requirement already satisfied: pandas>=0.19.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (1.4.0)
Requirement already satisfied: plotly>=4.1.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (5.5.0)
Requirement already satisfied: ipywidgets>=7.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (7.6.5)
Requirement already satisfied: pexpect>4.3 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (4.8.0)
Requirement already satisfied: jedi>=0.16 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.18.1)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (3.0.24)
Requirement already satisfied: backcall in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.2.0)
Requirement already satisfied: pygments in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (2.11.2)
Requirement already satisfied: traitlets>=4.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (5.1.1)
Requirement already satisfied: decorator in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (5.1.1)
Requirement already satisfied: pickleshare in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.7.5)
Requirement already satisfied: matplotlib-inline in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.1.3)
Requirement already satisfied: ipykernel>=4.5.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (6.6.1)
Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (1.0.2)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (3.5.2)
Requirement already satisfied: nbformat>=4.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (5.1.3)
Requirement already satisfied: ipython-genutils~=0.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (0.2.0)
Requirement already satisfied: debugpy<2.0,>=1.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (1.5.1)
Requirement already satisfied: nest-asyncio in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (1.5.4)
Requirement already satisfied: tornado<7.0,>=4.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1)
Requirement already satisfied: jupyter-client<8.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (7.1.0)
Requirement already satisfied: parso<0.9.0,>=0.8.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jedi>=0.16->ipython>=5.3.0->cufflinks) (0.8.3)
Requirement already satisfied: python-dateutil>=2.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (2.8.2)
Requirement already satisfied: pyzmq>=13 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (22.3.0)
Requirement already satisfied: entrypoints in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (0.3)
Requirement already satisfied: jupyter-core>=4.6.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (4.9.1)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (4.3.3)
Requirement already satisfied: importlib-resources>=1.4.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (5.4.0)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.18.0)
Requirement already satisfied: attrs>=17.4.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (21.4.0)
Requirement already satisfied: zipp>=3.1.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from importlib-resources>=1.4.0->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (3.7.0)
Requirement already satisfied: pytz>=2020.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from pandas>=0.19.2->cufflinks) (2021.3)
Requirement already satisfied: ptyprocess>=0.5 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from pexpect>4.3->ipython>=5.3.0->cufflinks) (0.7.0)
Requirement already satisfied: tenacity>=6.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from plotly>=4.1.1->cufflinks) (8.0.1)
Requirement already satisfied: wcwidth in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->cufflinks) (0.2.5)
Requirement already satisfied: notebook>=4.4.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.4.6)
Requirement already satisfied: prometheus-client in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.12.0)
Requirement already satisfied: argon2-cffi in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (21.3.0)
Requirement already satisfied: nbconvert in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.4.0)
Requirement already satisfied: Send2Trash>=1.8.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.8.0)
Requirement already satisfied: terminado>=0.8.3 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.12.1)
Requirement already satisfied: jinja2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.0.3)
Requirement already satisfied: argon2-cffi-bindings in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (21.2.0)
Requirement already satisfied: cffi>=1.0.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.15.0)
Requirement already satisfied: pycparser in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.21)
Requirement already satisfied: MarkupSafe>=2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.0.1)
Requirement already satisfied: testpath in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.0)
Requirement already satisfied: bleach in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (4.1.0)
Requirement already satisfied: defusedxml in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.7.1)
Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.9)
Requirement already satisfied: pandocfilters>=1.4.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.5.0)
Requirement already satisfied: mistune<2,>=0.8.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.8.4)
Requirement already satisfied: jupyterlab-pygments in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.1.2)
Requirement already satisfied: packaging in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (21.3)
Requirement already satisfied: webencodings in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.1)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from packaging->bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.0.6)
ERROR: Could not find a version that satisfies the requirement foliu (from versions: none)
ERROR: No matching distribution found for foliu
In [72]:
df = pd.read_csv("yds_data.csv",index_col="Unnamed: 0")
In [262]:
df.columns
Out[262]:
Index(['location_x', 'location_y', 'remaining_min', 'power_of_shot',
       'remaining_sec', 'distance_of_shot', 'is_goal', 'area_of_shot',
       'shot_basics', 'range_of_shot', 'home_away', 'lat_lng',
       'remaining_min_1', 'power_of_shot_1', 'knockout_match_1',
       'remaining_sec_1', 'distance_of_shot_1'],
      dtype='object')
In [73]:
df.head()
Out[73]:
match_event_id location_x location_y remaining_min power_of_shot knockout_match game_season remaining_sec distance_of_shot is_goal ... lat/lng type_of_shot type_of_combined_shot match_id team_id remaining_min.1 power_of_shot.1 knockout_match.1 remaining_sec.1 distance_of_shot.1
0 10.0 167.0 72.0 10.0 1.0 0.0 2000-01 27.0 38.0 NaN ... 45.539131, -122.651648 shot - 30 NaN 20000012 1610612747 10.00 1.0 50.608 54.2000 38.0
1 12.0 -157.0 0.0 10.0 1.0 0.0 2000-01 22.0 35.0 0.0 ... 45.539131, -122.651648 shot - 45 NaN 20000012 1610612747 10.00 1.0 28.800 22.0000 35.0
2 35.0 -101.0 135.0 7.0 1.0 0.0 2000-01 45.0 36.0 1.0 ... 45.539131, -122.651648 shot - 25 NaN 20000012 1610612747 92.64 1.0 0.000 63.7216 54.4
3 43.0 138.0 175.0 6.0 1.0 0.0 2000-01 52.0 42.0 0.0 ... 45.539131, -122.651648 NaN shot - 3 20000012 1610612747 NaN 1.0 122.608 52.0000 42.0
4 155.0 0.0 0.0 NaN 2.0 0.0 2000-01 19.0 20.0 1.0 ... 45.539131, -122.651648 NaN shot - 1 20000012 1610612747 42.64 2.0 0.000 19.0000 20.0

5 rows × 27 columns

In [74]:
df.isnull().sum()
Out[74]:
match_event_id            1563
location_x                1461
location_y                1540
remaining_min             1562
power_of_shot             1486
knockout_match            1517
game_season               5862
remaining_sec             1594
distance_of_shot          1567
is_goal                   6268
area_of_shot              1502
shot_basics               1575
range_of_shot             1564
team_name                 1535
date_of_game              1550
home/away                 1497
shot_id_number            1563
lat/lng                   1565
type_of_shot             15280
type_of_combined_shot    15417
match_id                     0
team_id                      0
remaining_min.1           1535
power_of_shot.1           1539
knockout_match.1          1493
remaining_sec.1           1539
distance_of_shot.1        1568
dtype: int64
In [75]:
df.drop(labels=["match_event_id","knockout_match","shot_id_number","game_season","team_name","date_of_game","match_id","team_id"],axis=1,inplace=True)
In [76]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 30697 entries, 0 to 30696
Data columns (total 19 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   location_x             29236 non-null  float64
 1   location_y             29157 non-null  float64
 2   remaining_min          29135 non-null  float64
 3   power_of_shot          29211 non-null  float64
 4   remaining_sec          29103 non-null  float64
 5   distance_of_shot       29130 non-null  float64
 6   is_goal                24429 non-null  float64
 7   area_of_shot           29195 non-null  object 
 8   shot_basics            29122 non-null  object 
 9   range_of_shot          29133 non-null  object 
 10  home/away              29200 non-null  object 
 11  lat/lng                29132 non-null  object 
 12  type_of_shot           15417 non-null  object 
 13  type_of_combined_shot  15280 non-null  object 
 14  remaining_min.1        29162 non-null  float64
 15  power_of_shot.1        29158 non-null  float64
 16  knockout_match.1       29204 non-null  float64
 17  remaining_sec.1        29158 non-null  float64
 18  distance_of_shot.1     29129 non-null  float64
dtypes: float64(12), object(7)
memory usage: 4.7+ MB
In [77]:
df.rename(columns={"home/away":"home_away","lat/lng":"lat_lng","remaining_min.1":"remaining_min_1","power_of_shot.1":"power_of_shot_1","knockout_match.1":"knockout_match_1","remaining_sec.1":"remaining_sec_1","distance_of_shot.1":"distance_of_shot_1"},inplace=True)
In [78]:
df['is_goal'].isna().sum()
Out[78]:
6268
In [79]:
df['is_goal'].fillna(value='Unknown',inplace=True)
In [80]:
df['is_goal'] = df['is _goal'].astype("bool")
In [81]:
df
Out[81]:
location_x location_y remaining_min power_of_shot remaining_sec distance_of_shot is_goal area_of_shot shot_basics range_of_shot home_away lat_lng type_of_shot type_of_combined_shot remaining_min_1 power_of_shot_1 knockout_match_1 remaining_sec_1 distance_of_shot_1
0 167.0 72.0 10.0 1.0 27.0 38.0 True Right Side(R) Mid Range 16-24 ft. MANU @ POR 45.539131, -122.651648 shot - 30 NaN 10.00 1.00 50.608 54.2000 38.0
1 -157.0 0.0 10.0 1.0 22.0 35.0 False Left Side(L) Mid Range 8-16 ft. MANU @ POR 45.539131, -122.651648 shot - 45 NaN 10.00 1.00 28.800 22.0000 35.0
2 -101.0 135.0 7.0 1.0 45.0 36.0 True Left Side Center(LC) Mid Range 16-24 ft. NaN 45.539131, -122.651648 shot - 25 NaN 92.64 1.00 0.000 63.7216 54.4
3 138.0 175.0 6.0 1.0 52.0 42.0 False Right Side Center(RC) Mid Range 16-24 ft. MANU @ POR 45.539131, -122.651648 NaN shot - 3 NaN 1.00 122.608 52.0000 42.0
4 0.0 0.0 NaN 2.0 19.0 20.0 True Center(C) Goal Area Less Than 8 ft. MANU @ POR 45.539131, -122.651648 NaN shot - 1 42.64 2.00 0.000 19.0000 20.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
30692 1.0 48.0 6.0 4.0 5.0 24.0 False Center(C) NaN Less Than 8 ft. MANU vs. IND 42.982923, -71.446094 shot - 1 NaN 17.20 4.00 1.000 5.0000 24.0
30693 0.0 0.0 6.0 4.0 5.0 20.0 True Center(C) Goal Area Less Than 8 ft. MANU vs. IND 42.982923, -71.446094 shot - 49 NaN 6.00 64.36 1.000 5.0000 20.0
30694 -134.0 166.0 3.0 4.0 28.0 41.0 True Left Side Center(LC) Mid Range 16-24 ft. MANU vs. IND NaN NaN shot - 3 3.00 4.00 1.000 28.0000 41.0
30695 31.0 267.0 2.0 4.0 10.0 46.0 False Center(C) Penalty Spot NaN MANU vs. IND 42.982923, -71.446094 shot - 26 NaN 2.00 112.36 1.000 10.0000 46.0
30696 1.0 NaN 0.0 4.0 39.0 27.0 False Center(C) Goal Line Less Than 8 ft. MANU vs. IND 42.982923, -71.446094 shot - 45 NaN 0.00 4.00 1.000 39.0000 27.0

30697 rows × 19 columns

In [82]:
profile = pf.ProfileReport(df=df,explorative=True)
profile.to_file(output_file="Profiling_report.html")
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/pandas_profiling/model/correlations.py:55: UserWarning:

There was an attempt to calculate the cramers correlation, but this failed.
To hide this warning, disable the calculation
(using `df.profile_report(correlations={"cramers": {"calculate": False}})`
If this is problematic for your use case, please report this as an issue:
https://github.com/pandas-profiling/pandas-profiling/issues
(include the error message: 'No data; `observed` has size 0.')

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/pandas_profiling/model/correlations.py:120: RuntimeWarning:

invalid value encountered in greater_equal

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]
In [83]:
df['remaining_min'] = df['remaining_min'].astype("float")
df.describe()
Out[83]:
location_x location_y remaining_min power_of_shot remaining_sec distance_of_shot remaining_min_1 power_of_shot_1 knockout_match_1 remaining_sec_1 distance_of_shot_1
count 29236.000000 29157.000000 29135.000000 29211.000000 29103.000000 29130.000000 29162.000000 29158.000000 29204.000000 29158.000000 29129.000000
mean 7.383876 91.126933 4.883233 2.519359 28.329382 33.448884 18.204615 15.994109 16.599402 39.027303 38.801852
std 110.263049 87.676395 3.452533 1.153976 17.470663 9.369656 29.416973 29.676815 35.172016 29.835284 18.787711
min -250.000000 -44.000000 0.000000 1.000000 0.000000 20.000000 0.000000 1.000000 0.000000 0.000000 9.400000
25% -68.000000 4.000000 2.000000 1.000000 13.000000 25.000000 3.000000 2.000000 0.000000 17.000000 26.000000
50% 0.000000 74.000000 5.000000 3.000000 28.000000 35.000000 6.000000 3.000000 0.000000 35.000000 36.000000
75% 95.000000 160.000000 8.000000 3.000000 43.000000 41.000000 11.000000 4.000000 1.000000 52.000000 44.000000
max 248.000000 791.000000 11.000000 7.000000 59.000000 99.000000 128.761600 118.360000 141.352320 144.785600 115.728000

Analysis report from pandas profiler¶

  1. Goal shoot x and y location image.png
    Conclusion: Most of the goals are from the 0 location and thereafter location 1
    image-2.png
    For y most of the goals are from the 0 and 7 location
    2. Area of shot of ronaldo
    image-3.png Most of the shots are in the center location
    3. Most of the shots are made from the location of (42.98, -71.44)
    image-4.png 4. Most of the shots are of the type shot - 39
    image-5.png 5. Most of the combined shot is shot 3 image-6.png
  1. Most of the values of the power shot are in the range 1-5 </b> image.png 7. Most of the shot distance is 20 whereas on an average of 40 is perfect shot distance image-2.png
In [84]:
df.shape
Out[84]:
(30697, 19)
In [85]:
df.isna().sum()
Out[85]:
location_x                1461
location_y                1540
remaining_min             1562
power_of_shot             1486
remaining_sec             1594
distance_of_shot          1567
is_goal                      0
area_of_shot              1502
shot_basics               1575
range_of_shot             1564
home_away                 1497
lat_lng                   1565
type_of_shot             15280
type_of_combined_shot    15417
remaining_min_1           1535
power_of_shot_1           1539
knockout_match_1          1493
remaining_sec_1           1539
distance_of_shot_1        1568
dtype: int64
In [86]:
df.corr()
Out[86]:
location_x location_y remaining_min power_of_shot remaining_sec distance_of_shot is_goal remaining_min_1 power_of_shot_1 knockout_match_1 remaining_sec_1 distance_of_shot_1
location_x 1.000000 -0.014907 0.007889 -0.028939 0.001631 0.023483 -0.004652 0.002110 -0.000150 0.005408 0.006466 0.004695
location_y -0.014907 1.000000 -0.074579 0.041263 -0.050964 0.818727 -0.119565 -0.010678 -0.000933 0.009672 -0.022644 0.323209
remaining_min 0.007889 -0.074579 1.000000 -0.043992 0.026664 -0.058495 0.023144 0.098414 -0.004447 -0.010549 0.016655 -0.022925
power_of_shot -0.028939 0.041263 -0.043992 1.000000 0.003066 0.046184 -0.029246 -0.018268 0.026339 -0.003296 0.007004 0.023045
remaining_sec 0.001631 -0.050964 0.026664 0.003066 1.000000 -0.049875 0.025155 0.006628 0.002475 0.002258 0.464134 -0.018455
distance_of_shot 0.023483 0.818727 -0.058495 0.046184 -0.049875 1.000000 -0.159730 -0.006094 0.000784 0.006642 -0.025477 0.398123
is_goal -0.004652 -0.119565 0.023144 -0.029246 0.025155 -0.159730 1.000000 0.008287 -0.000745 0.001014 0.014997 -0.057310
remaining_min_1 0.002110 -0.010678 0.098414 -0.018268 0.006628 -0.006094 0.008287 1.000000 -0.000642 0.001780 0.019363 -0.012790
power_of_shot_1 -0.000150 -0.000933 -0.004447 0.026339 0.002475 0.000784 -0.000745 -0.000642 1.000000 -0.006052 0.006970 0.002725
knockout_match_1 0.005408 0.009672 -0.010549 -0.003296 0.002258 0.006642 0.001014 0.001780 -0.006052 1.000000 0.004459 -0.000589
remaining_sec_1 0.006466 -0.022644 0.016655 0.007004 0.464134 -0.025477 0.014997 0.019363 0.006970 0.004459 1.000000 -0.008911
distance_of_shot_1 0.004695 0.323209 -0.022925 0.023045 -0.018455 0.398123 -0.057310 -0.012790 0.002725 -0.000589 -0.008911 1.000000

Removing type_of_shot and type_of_combined_shot¶

Because it has highest nan values

In [87]:
df.drop(labels=['type_of_shot','type_of_combined_shot'],inplace=True,axis=1)
In [88]:
df
Out[88]:
location_x location_y remaining_min power_of_shot remaining_sec distance_of_shot is_goal area_of_shot shot_basics range_of_shot home_away lat_lng remaining_min_1 power_of_shot_1 knockout_match_1 remaining_sec_1 distance_of_shot_1
0 167.0 72.0 10.0 1.0 27.0 38.0 True Right Side(R) Mid Range 16-24 ft. MANU @ POR 45.539131, -122.651648 10.00 1.00 50.608 54.2000 38.0
1 -157.0 0.0 10.0 1.0 22.0 35.0 False Left Side(L) Mid Range 8-16 ft. MANU @ POR 45.539131, -122.651648 10.00 1.00 28.800 22.0000 35.0
2 -101.0 135.0 7.0 1.0 45.0 36.0 True Left Side Center(LC) Mid Range 16-24 ft. NaN 45.539131, -122.651648 92.64 1.00 0.000 63.7216 54.4
3 138.0 175.0 6.0 1.0 52.0 42.0 False Right Side Center(RC) Mid Range 16-24 ft. MANU @ POR 45.539131, -122.651648 NaN 1.00 122.608 52.0000 42.0
4 0.0 0.0 NaN 2.0 19.0 20.0 True Center(C) Goal Area Less Than 8 ft. MANU @ POR 45.539131, -122.651648 42.64 2.00 0.000 19.0000 20.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
30692 1.0 48.0 6.0 4.0 5.0 24.0 False Center(C) NaN Less Than 8 ft. MANU vs. IND 42.982923, -71.446094 17.20 4.00 1.000 5.0000 24.0
30693 0.0 0.0 6.0 4.0 5.0 20.0 True Center(C) Goal Area Less Than 8 ft. MANU vs. IND 42.982923, -71.446094 6.00 64.36 1.000 5.0000 20.0
30694 -134.0 166.0 3.0 4.0 28.0 41.0 True Left Side Center(LC) Mid Range 16-24 ft. MANU vs. IND NaN 3.00 4.00 1.000 28.0000 41.0
30695 31.0 267.0 2.0 4.0 10.0 46.0 False Center(C) Penalty Spot NaN MANU vs. IND 42.982923, -71.446094 2.00 112.36 1.000 10.0000 46.0
30696 1.0 NaN 0.0 4.0 39.0 27.0 False Center(C) Goal Line Less Than 8 ft. MANU vs. IND 42.982923, -71.446094 0.00 4.00 1.000 39.0000 27.0

30697 rows × 17 columns

Determining what are the locations where goal was done¶

Conclusion: If the y location exceeds 250 then chances of goal are getting reduced

In [89]:
df_loc = df[['location_x','location_y','is_goal']]
fig = px.scatter(df_loc, x="location_x", y="location_y", color="is_goal", hover_data=['is_goal'])
fig.show()

Determining relation between time and goal¶

Since there happens goal as well as no goal at particular time interval. It is difficult to determine

In [90]:
df_time = df[['remaining_min','remaining_sec','is_goal']]
fig = px.scatter(df_time, x="remaining_sec", y="is_goal", color="is_goal", hover_data=['is_goal'])
fig.show()
In [91]:
df_time = df[['remaining_min','remaining_sec','is_goal']]
fig = px.scatter(df_time, x="remaining_min", y="is_goal", color="is_goal", hover_data=['is_goal'])
fig.show()
In [92]:
df['power_of_shot'].unique()
Out[92]:
array([ 1.,  2.,  3., nan,  4.,  5.,  6.,  7.])

Relation between power of shot and goal¶

Most of the goals are in the range of 1-3 of powershot

In [93]:
px.box(df, x="power_of_shot", y="is_goal", points="all")

The best distance of goal is 20 to 40. Moreover, when distance exceeds 63 then chances of goal will be reduced

In [94]:
px.box(df, x="distance_of_shot", y="is_goal")

Relation between is goal and area of shot¶

  • Since both the values are very close for goal and not goal, it is not possible to make analysis
In [95]:
df[df['is_goal']][['area_of_shot']].value_counts().iplot(kind='bar')
In [96]:
df[df['is_goal']==False][['area_of_shot']].value_counts().iplot(kind='bar')

Area of shot for goals¶

For distance between 20 to 40 and power of shot between 1 to 4. The area of shot should be either center or left side or right side

In [97]:
t = df[df['area_of_shot'].notna()]
px.scatter(x='power_of_shot',y='distance_of_shot',color='area_of_shot',data_frame=t)
In [98]:
df[['range_of_shot','distance_of_shot']].groupby('range_of_shot').value_counts()
Out[98]:
range_of_shot    distance_of_shot
16-24 ft.        37.0                1332
                 36.0                1269
                 38.0                1265
                 39.0                1230
                 40.0                1101
                                     ... 
Less Than 8 ft.  22.0                 511
                 25.0                 486
                 23.0                 349
                 24.0                 333
                 28.0                   3
Length: 82, dtype: int64
In [99]:
df['range_of_shot'].unique()
Out[99]:
array(['16-24 ft.', '8-16 ft.', 'Less Than 8 ft.', '24+ ft.', nan,
       'Back Court Shot'], dtype=object)
In [100]:
temp = df[['range_of_shot','distance_of_shot']].copy()
temp.dropna()
Out[100]:
range_of_shot distance_of_shot
0 16-24 ft. 38.0
1 8-16 ft. 35.0
2 16-24 ft. 36.0
3 16-24 ft. 42.0
4 Less Than 8 ft. 20.0
... ... ...
30691 Less Than 8 ft. 20.0
30692 Less Than 8 ft. 24.0
30693 Less Than 8 ft. 20.0
30694 16-24 ft. 41.0
30696 Less Than 8 ft. 27.0

27655 rows × 2 columns

In [101]:
temp.groupby('range_of_shot').min()
Out[101]:
distance_of_shot
range_of_shot
16-24 ft. 36.0
24+ ft. 42.0
8-16 ft. 28.0
Back Court Shot 60.0
Less Than 8 ft. 20.0
In [102]:
temp.groupby('range_of_shot').max()
Out[102]:
distance_of_shot
range_of_shot
16-24 ft. 43.0
24+ ft. 65.0
8-16 ft. 36.0
Back Court Shot 99.0
Less Than 8 ft. 28.0
In [103]:
fig = px.scatter_matrix(df.iloc[::,3:8])
fig.update_layout(
    width=1500,
    height=1500,
    paper_bgcolor="LightSteelBlue",
)

Filling nan values in numeric data using KNN imputer¶

In [117]:
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=4)
temp_df = imputer.fit_transform(df[df.columns[np.where(df.dtypes == 'float64')]].copy())
In [120]:
temp_df = pd.DataFrame(temp_df,columns=df.columns[np.where(df.dtypes == 'float64')])

Finding the outliers¶

  • To find the outlier we first scale the data and plot the box plot.
  • For scaling, I am using the quantile transformer
  • Since the box plot have no datapoints beyond min and max of the boxes. Thus there are no outliers in the numeric data
In [121]:
df_scale = temp_df.copy()

from sklearn import preprocessing
df_scale = preprocessing.QuantileTransformer().fit_transform(df_scale)
df_scale = pd.DataFrame(df_scale,columns=df.columns[np.where(df.dtypes == 'float64')])
In [122]:
df_scale.iplot(kind='box')

Correlation of the numeric data¶

In [123]:
fig = px.imshow(df_scale.corr(),text_auto=True)
fig.update_layout(
    width=1000,
    height=1000,
    paper_bgcolor="LightSteelBlue",
)

Multicollinearity detection¶

  • There is no multicollinearity present based on pearson coeffecient

Finding important features using ANOVA analysis¶

In [124]:
from sklearn.feature_selection import f_classif
In [126]:
Y = df['is_goal'].values.reshape(-1,1)
X = df_scale
f,p = f_classif(X,Y)
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

In [129]:
df_scale.columns
Out[129]:
Index(['location_x', 'location_y', 'remaining_min', 'power_of_shot',
       'remaining_sec', 'distance_of_shot', 'remaining_min_1',
       'power_of_shot_1', 'knockout_match_1', 'remaining_sec_1',
       'distance_of_shot_1'],
      dtype='object')
In [136]:
p
Out[136]:
array([1.35970442e-001, 9.56198300e-098, 6.35356916e-006, 4.81045742e-007,
       1.39147234e-006, 1.54081332e-168, 1.14017425e-004, 2.32936418e-003,
       8.97978807e-001, 2.02747370e-004, 3.72790021e-076])
In [134]:
px.bar(f)
In [239]:
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn import metrics
from sklearn.metrics import auc


def apply_f_classif(x, y, k):
    
    select_features = SelectKBest(f_classif, k = k)
    x_new = select_features.fit_transform(x, y)
        
    return pd.DataFrame(x_new)
In [240]:
def logistic_fn(x_train, y_train):
    
    model = LogisticRegression(solver = 'saga',)
    model.fit(x_train, y_train)
    
    return model
In [241]:
result_dict = {}
from sklearn.model_selection import train_test_split
In [242]:
def build_model(Y, 
                features, 
                X,
                preprocess_fn,
                *hyperparameters):
    

    X = preprocess_fn(X, Y, *hyperparameters)
    
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    
    model = logistic_fn(x_train, y_train)
    
    y_pred = model.predict(x_test)
    
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
    print(fpr,tpr)
    
    acc = metrics.roc_auc_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    return {'accuracy': acc, 
            'precision' : prec,
            'recall' : recall}
In [243]:
FEATURES = list(df_scale.columns[:-1])
In [245]:
result_dict = {}
    
for i in range (1, 12):
    result_dict['f_classif - ' + str(i)] = build_model(Y,
                                                       FEATURES,
                                                       X,
                                                       apply_f_classif,
                                                       i)
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

[0.         0.71482085 1.        ] [nan nan nan]
[0.         0.70553746 1.        ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

[0.         0.70749186 1.        ] [nan nan nan]
[0.         0.72166124 1.        ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

[0.         0.72459283 1.        ] [nan nan nan]
[0.        0.7247557 1.       ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

[0.         0.71710098 1.        ] [nan nan nan]
[0.         0.71726384 1.        ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

[0.         0.73355049 1.        ] [nan nan nan]
[0.         0.72345277 1.        ] [nan nan nan]
[0.        0.7267101 1.       ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning:

No positive samples in y_true, true positive value should be meaningless

In [249]:
li = []
def compare_results(result_dict):
    
    for key in result_dict:
        print('Test: ', key)

        print()
        li.append(result_dict[key]['accuracy'])
        print("accuracy_score : ", result_dict[key]['accuracy'])
        print("precision_score : ", result_dict[key]['precision'])
        print("recall_score : ", result_dict[key]['recall'])
        print()
        
compare_results(result_dict)
Test:  f_classif - 1

accuracy_score :  0.5365195451448821
precision_score :  0.5862383230804283
recall_score :  0.7468795355587808

Test:  f_classif - 2

accuracy_score :  0.545386405803008
precision_score :  0.5937211449676824
recall_score :  0.7452912199362504

Test:  f_classif - 3

accuracy_score :  0.5440874613027396
precision_score :  0.6010589318600368
recall_score :  0.7453611190408221

Test:  f_classif - 4

accuracy_score :  0.5382528095567639
precision_score :  0.5820356578650417
recall_score :  0.7556401992382069

Test:  f_classif - 5

accuracy_score :  0.5342773761095279
precision_score :  0.5740615868734547
recall_score :  0.7553978112984324

Test:  f_classif - 6

accuracy_score :  0.542633584963786
precision_score :  0.5887640449438202
recall_score :  0.762292697119581

Test:  f_classif - 7

accuracy_score :  0.5402292912741782
precision_score :  0.5870997047467635
recall_score :  0.75254730713246

Test:  f_classif - 8

accuracy_score :  0.5491327495086207
precision_score :  0.5994550408719346
recall_score :  0.7599309153713298

Test:  f_classif - 9

accuracy_score :  0.5421240303289414
precision_score :  0.5814831261101243
recall_score :  0.7712014134275619

Test:  f_classif - 10

accuracy_score :  0.547833967626939
precision_score :  0.590274651058082
recall_score :  0.7657710280373832

Test:  f_classif - 11

accuracy_score :  0.5456807518388342
precision_score :  0.5844912595248767
recall_score :  0.7675103001765744

In [251]:
px.line(li)
In [261]:
from sklearn.feature_selection import SelectKBest
select_features = SelectKBest(f_classif, k = 7)
X_new = select_features.fit_transform(X, Y)
X_new = pd.DataFrame(X_new)

selected_features = []

for i in range(len(X_new.columns)):
    for j in range(len(X.columns)):
        
        if(X_new.iloc[:,i].equals(X.iloc[:,j])):
            selected_features.append(X.columns[j])
            
selected_features
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

Out[261]:
['location_y',
 'remaining_min',
 'power_of_shot',
 'remaining_sec',
 'distance_of_shot',
 'remaining_min_1',
 'distance_of_shot_1']
In [ ]: